clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Graph: Time of First Coaching
********************************************************************************
***** Latest update: 18-01-2020
********************************************************************************
*** WARNING: TAKES MORE THAN AN HOUR TO RUN.
********************************************************************************

*global filepath "C:\JPEMicReplication"
log using "$filepath/Logs/08Peereffort.log", replace
	
	local q=0
	
	while `q'<=10 {
	import excel "$filepath\Data\Using databases\feedback_`q'.xlsx", firstrow


	*** Local variables *** 
	local afronding=0.00000001

	*** Preliminaries

	* Drop columns that are not relevant at the moment 	
	drop events_per_1000km_intouro_a
	drop events_per_1000km_iris_a
	drop events_per_1000km_vdl_a
	drop events_per_1000km_intouro_b
	drop events_per_1000km_iris_b
	drop events_per_1000km_vdl_b
	drop events_per_1000km_intouro_c
	drop events_per_1000km_iris_c
	drop events_per_1000km_vdl_c
	drop km_per_lt_intouro_d
	drop km_per_lt_vdl_d
	drop score_intouro_a
	drop score_iris_a
	drop score_vdl_a
	drop score_intouro_b
	drop score_iris_b
	drop score_vdl_b
	drop score_intouro_c
	drop score_iris_c
	drop score_vdl_c
	drop score_intouro_d
	drop score_vdl_d
	
	
	gen treatment2=. 
	replace treatment2=1 if treatment=="T1"
	replace treatment2=2 if treatment=="T2"
	replace treatment2=3 if treatment=="T3"
	replace treatment2=4 if treatment=="T4"

	drop treatment
	rename treatment2 treatment
	
gen Interventie="0p/0n" if  treatment==1
replace Interventie="0p/1n" if  treatment==2
replace Interventie="1p/1n" if  treatment==3
replace Interventie="0p/3n" if  treatment==4

	gen female=.
	replace female=0 if gender=="M"
	replace female=1 if gender=="V"

	tab female gender

	gen absent=-9
	replace absent=0 if total_score~=.
	replace absent=1 if total_score==.
	count if absent==1
	** Check: no absent with value "-9" left? 
	tab absent
	** Check= OK!
	
	replace  km_vdl_a=. if absent==1
	replace  km_iris_a=. if absent==1
	replace  km_intouro_a=. if absent==1 
	replace  total_km_a=. if absent==1 
	replace  km_intouro_b=. if absent==1 
	replace  km_vdl_b=. if absent==1 
	replace  km_iris_b=. if absent==1 
	replace  total_km_b=. if absent==1 
	replace  km_iris_c=. if absent==1 
	replace  km_vdl_c=. if absent==1 
	replace  km_intouro_c=. if absent==1 
	replace  total_km_c=. if absent==1 
	replace  km_vdl_d =. if absent==1
	replace  km_intouro_d =. if absent==1
	replace  total_km_d=. if absent==1 
	replace  perc_vdl_a=. if absent==1 
	replace  perc_iris_a=. if absent==1 
	replace  perc_vdl_b=. if absent==1 
	replace  perc_iris_b=. if absent==1 
	replace  perc_intouro_c=. if absent==1 
	replace  perc_vdl_c=. if absent==1 
	replace  perc_vdl_d=. if absent==1 
	replace  perc_intouro_d=. if absent==1 
	 
	****************************************************************
	** Calculate weighted scores on each of the four dimensions: ***	
	** dimensions: 												 ***
	** a. Acceleration 							    			 ***
	** b. Braking 									 			 ***
	** c. Cornering 											 ***
	** d. Fuel Economy											 ***
	****************************************************************


	** Acceleration

	foreach var of varlist km_iris_a km_vdl_a km_intouro_a {
	  generate weight`var' = `var' / total_km_a
	  }

	** Braking

	foreach var of varlist km_iris_b km_vdl_b km_intouro_b {
	  generate weight`var' = `var' / total_km_b
	  }

	** Cornering

	foreach var of varlist km_intouro_c km_vdl_c km_iris_c {
	  generate weight`var' = `var' / total_km_c
	  }


	** Fuel consumption					
	foreach var of varlist km_intouro_d km_vdl_d {
	  generate weight`var' = `var' / total_km_d
	  }

	** Overall scores

	* Overall score on acceleration
	
	gen weighted_perc_score_iris_a= weightkm_iris_a * perc_iris_a
	gen weighted_perc_score_intouro_a= weightkm_intouro_a * perc_intouro_a
	gen weighted_perc_score_vdl_a= weightkm_vdl_a * perc_vdl_a
	egen perc_score_a=rowtotal(weighted_perc_score_iris_a weighted_perc_score_intouro_a weighted_perc_score_vdl_a)
	replace perc_score_a=. if score_a==.

	* Overall score on Braking 

	gen weighted_perc_score_iris_b= weightkm_iris_b * perc_iris_b
	gen weighted_perc_score_intouro_b= weightkm_intouro_b * perc_intouro_b
	gen weighted_perc_score_vdl_b= weightkm_vdl_b * perc_vdl_b
	egen perc_score_b=rowtotal(weighted_perc_score_iris_b weighted_perc_score_intouro_b weighted_perc_score_vdl_b)
	replace perc_score_b=. if score_b==.
	
	* Overall score on Cornering 

	gen weighted_perc_score_iris_c= weightkm_iris_c * perc_iris_c
	gen weighted_perc_score_intouro_c= weightkm_intouro_c * perc_intouro_c
	gen weighted_perc_score_vdl_c= weightkm_vdl_c * perc_vdl_c
	egen perc_score_c=rowtotal(weighted_perc_score_iris_c weighted_perc_score_intouro_c weighted_perc_score_vdl_c)
	replace perc_score_c=. if score_c==.

	* Overall score on fuel consumption 

	gen perc_score_d= (weightkm_intouro_d * perc_intouro_d) + (weightkm_vdl_d * perc_vdl_d)
	replace perc_score_d=. if score_d==.

	* list of all driver id's
	levelsof driver_id, local(lijstdrivers)
	local nrdrivers: word count `lijstdrivers'

	levelsof location, local(lijstlocaties)
	local nrlocaties: word count `lijstlocaties'
	gen byte locnumber=-9
	local j=1
	** Identify each location with a unique number 
	foreach i of local lijstlocaties{ 
	replace locnumber=`j' if location=="`i'"
	local j= `j' + 1
	}
	* Check that no "locnumber" still equals "-9"
	tab locnumber 
	** Check = OK!

	** Determine which treatment-location combination has most drivers (this puts upper bound on number of peers);
	** this number is stored as local variable "maxpeer".
	bysort locnumber treatment: gen int reprobsloc=_n
	sum reprobsloc
	tempvar int maxpeer
	gen int `maxpeer'=r(max)
	di `maxpeer'
	drop reprobsloc

	** pretty unpractical way to find maximum value "maxdrivernr" in lijstdrivers, but "local maxdrivernr = word(r(levels), -1)" did not work
	local maxdrivernr = 1
	levelsof driver_id, local(lijstdrivers)
	foreach i of local lijstdrivers { 
	local maxdrivernr = `i'
	}
	di `maxdrivernr'
	
	local nrdrivers: word count `lijstdrivers'
	di `nrdrivers' 

	sort driver_id
	local i=1
quietly {

** loops are split in part a and b because Stata only allows for 5000 variables.

** Part a.
	foreach i of local lijstdrivers {  
	gen Trhulp`i'=.
	gen Lochulp`i'=.
	gen idhulp`i'=.
	gen afwezighulp`i'=.
	replace idhulp`i' =driver_id if driver_id==`i'
	replace Trhulp`i' =treatment if driver_id==`i'
	replace Lochulp`i'=locnumber if driver_id==`i'
	replace afwezighulp`i'=absent if driver_id==`i'
	}	

	foreach i of local lijstdrivers {  
	egen driverid_`i'=max(idhulp`i')
	egen Treatment_`i'=max(Trhulp`i')
	egen Location_`i'=max(Lochulp`i')
	egen Afwezig_`i'=max(afwezighulp`i')
	}	
drop  Trhulp1-afwezighulp`maxdrivernr'

** Part b.	
	foreach i of local lijstdrivers {  
	gen pscorehulp_a`i'=.
	gen pscorehulp_b`i'=.
	gen pscorehulp_c`i'=.
	gen pscorehulp_d`i'=.
	replace pscorehulp_a`i'=perc_score_a if driver_id==`i'
	replace pscorehulp_b`i'=perc_score_b if driver_id==`i'
	replace pscorehulp_c`i'=perc_score_c if driver_id==`i'
	replace pscorehulp_d`i'=perc_score_d if driver_id==`i'
	}
	foreach i of local lijstdrivers {  
	egen pscorea_`i'=max(pscorehulp_a`i')
	egen pscoreb_`i'=max(pscorehulp_b`i')
	egen pscorec_`i'=max(pscorehulp_c`i')
	egen pscored_`i'=max(pscorehulp_d`i')
	}	

	drop  pscorehulp_a1- pscorehulp_d`maxdrivernr'


	******************************************************
	** Determine peer group for each individual driver
	******************************************************

	local i=1
	while `i'<=`maxpeer' {
	gen peer_`i'=.
	gen pscorea_peer_`i'=.
	gen pscoreb_peer_`i'=.
	gen pscorec_peer_`i'=.
	gen pscored_peer_`i'=.
	local i= `i'+ 1
	}

	gen peercount=1
	** nr. of peers with score may differ per dimension (some drivers with score for a and b but no score for dimension c, say.)
	gen peercounta=1
	gen peercountb=1
	gen peercountc=1
	gen peercountd=1
	local rij=1
	sort driver_id
	** we define 'rij' because no 1-to-1 match drivers_id and rows!!!
	foreach i of local lijstdrivers {
		di `rij'
		local j=1
			foreach j of local lijstdrivers { 
				local k = peercount[`rij'] 
				if `k'>`maxpeer' {
					continue, break
				}
				replace peer_`k'= driverid_`j' if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace pscorea_peer_`k'=pscorea_`j' if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace pscoreb_peer_`k'=pscoreb_`j' if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace pscorec_peer_`k'=pscorec_`j' if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace pscored_peer_`k'=pscored_`j' if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace peercount = peercount +1 if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & Afwezig_`j'==0)
				replace peercounta = peercounta +1 if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & pscorea_`j'~=.)
				replace peercountb = peercountb +1 if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & pscoreb_`j'~=.)
				replace peercountc = peercountc +1 if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & pscorec_`j'~=.)
				replace peercountd = peercountd +1 if (driver_id==`i' & locnumber==Location_`j' & treatment==Treatment_`j' & pscored_`j'~=.)
			}
		local rij = `rij'+ 1
	}


	** At some locations, number of peer per treatment is less than 10: redefine for those locations peer group as ALL
	** drivers from this location. To this end "& peercount<11" is added to, and "treatment==Treatment_`j'" is removed from if-conditions!
	gen pc2=1
	gen pc2a=1
	gen pc2b=1
	gen pc2c=1
	gen pc2d=1
	local rij=1
	sort driver_id
	** we define 'rij' because no 1-to-1 match drivers_id and rows!!!
	foreach i of local lijstdrivers {
		di `rij'
		local j=1
			foreach j of local lijstdrivers { 
				local k = pc2[`rij'] 
				if `k'>`maxpeer' {
					continue, break
				}
				replace peer_`k'= driverid_`j' if (driver_id==`i' & locnumber==Location_`j' & peercount<11)
				replace pscorea_peer_`k'=pscorea_`j' if (driver_id==`i' & locnumber==Location_`j' & Afwezig_`j'==0 & peercount<11)
				replace pscoreb_peer_`k'=pscoreb_`j' if (driver_id==`i' & locnumber==Location_`j' & Afwezig_`j'==0 & peercount<11)
				replace pscorec_peer_`k'=pscorec_`j' if (driver_id==`i' & locnumber==Location_`j' & Afwezig_`j'==0 & peercount<11)
				replace pscored_peer_`k'=pscored_`j' if (driver_id==`i' & locnumber==Location_`j' & Afwezig_`j'==0 & peercount<11)
				replace pc2 = pc2 +1 if (driver_id==`i' & locnumber==Location_`j'  & Afwezig_`j'==0 & peercount<11)
				replace pc2a = pc2a +1 if (driver_id==`i' & locnumber==Location_`j'  & pscorea_`j'~=. & peercount<11)
				replace pc2b = pc2b +1 if (driver_id==`i' & locnumber==Location_`j'  & pscoreb_`j'~=. & peercount<11)
				replace pc2c = pc2c +1 if (driver_id==`i' & locnumber==Location_`j'  & pscorec_`j'~=. & peercount<11)
				replace pc2d = pc2d +1 if (driver_id==`i' & locnumber==Location_`j'  & pscored_`j'~=. & peercount<11)
			}
		local rij = `rij'+ 1
	}
}
	** CHECK:
	list location peercount  pc2 peercounta pc2a peercountb pc2b peercountc pc2c peercountd pc2d
	** CHECK = OK!

	*** For each driver, number of peers is max( peercount, pc2)-1 (incl. individual)
	gen npeers=max( peercount, pc2)-1
	gen npeersa=max( peercounta, pc2a)-1
	gen npeersb=max( peercountb, pc2b)-1
    gen npeersc=max( peercountc, pc2c)-1
	gen npeersd=max( peercountd, pc2d)-1
	
	drop peercount pc2a peercount pc2a peercountb pc2b peercountc pc2c peercountd pc2d 
	drop driverid_1-pscored_`maxdrivernr'

quietly {
	********************************************************************
	** Determine for each score A, B, C, D ranking within peer group ***
	********************************************************************
gen long rankpercA=.
gen long rankpercB=.
gen long rankpercC=.
gen long rankpercD=.
	
	replace rankpercA=1/npeersa if perc_score_a~=.
	replace rankpercB=1/npeersb if perc_score_b~=.
	replace rankpercC=1/npeersc if perc_score_c~=.
	replace rankpercD=1/npeersd if perc_score_d~=.
	** if-condition: exclude drivers who have no score on this dimension



	** create numlist such that for every row "i", "j" will run over all peers.
	levelsof npeers, local(listnpeers)
	local maxnpeers = word(r(levels), -1)
	di `maxnpeers'
	numlist  "1/`maxnpeers'"
	di `r(numlist)'
	local np = r(numlist)
	
	
	sort driver_id
	foreach i of local lijstdrivers {
	foreach j of local np 	{
				replace rankpercA=rankpercA + 1/npeersa  if (driver_id==`i' & perc_score_a> pscorea_peer_`j' & perc_score_a~=.)
				replace rankpercB=rankpercB + 1/npeersb  if (driver_id==`i' & perc_score_b> pscoreb_peer_`j' & perc_score_b~=.)
				replace rankpercC=rankpercC + 1/npeersc  if (driver_id==`i' & perc_score_c> pscorec_peer_`j' & perc_score_c~=.)
				replace rankpercD=rankpercD + 1/npeersd  if (driver_id==`i' & perc_score_d> pscored_peer_`j' & perc_score_d~=.)
				** increase rank with 1/npeers for those dimensions for which driver scores lower (ie higher percentage) than peer.
				** if-conditions: 	1st: to assure we only change values for driver with id "i" in this iteration
				** 					2nd: percentile driver only increases if perc_score on this dimension is higher than of peer "j"
				**					3rd: exclude drivers who have no score on this dimension
		}
	}

}
list driver_id if absent==1


*** Percentile ranking per category

*** Treatments: preliminaries

gen bottom50_a=0
gen bottom50_b=0
gen bottom50_c=0
gen bottom50_d=0

** Correction "0.99/npeers" is added to be on conservative side: such that in group of, say, 35, nr. 18th does not belong to bottom 50%
tempvar hulpiea hulpieb hulpiec hulpied
gen long `hulpiea'=. 
gen long `hulpieb'=.
gen long `hulpiec'=.
gen long `hulpied'=.
replace `hulpiea'= 0.50 + (0.99/npeersa)
replace `hulpieb'= 0.50 + (0.99/npeersb)
replace `hulpiec'= 0.50 + (0.99/npeersc)
replace `hulpied'= 0.50 + (0.99/npeersd)
replace bottom50_a=1 if rankpercA>`hulpiea'
replace bottom50_a=. if rankpercA==.

replace bottom50_b=1 if rankpercB>`hulpieb' 
replace bottom50_b=. if rankpercB==.

replace bottom50_c=1 if rankpercC>`hulpiec'
replace bottom50_c=. if rankpercC==.

replace bottom50_d=1 if rankpercD>`hulpied'
replace bottom50_d=. if rankpercD==.


gen top25_a=0
gen top25_b=0
gen top25_c=0
gen top25_d=0

** correction e.g. to have individual ranked 9/33=.27273 in
replace `hulpiea'= 0.25 + (0.99/npeersa)
replace `hulpieb'= 0.25 + (0.99/npeersb)
replace `hulpiec'= 0.25 + (0.99/npeersc)
replace `hulpied'= 0.25 + (0.99/npeersd)

replace top25_a=1 if rankpercA<=`hulpiea'
replace top25_a=. if rankpercA==.

replace top25_b=1 if rankpercB<=`hulpieb'
replace top25_b=. if rankpercB==. 

replace top25_c=1 if rankpercC<=`hulpiec'
replace top25_c=. if rankpercC==. 

replace top25_d=1 if rankpercD<=`hulpied'
replace top25_d=. if rankpercD==. 

**************************************

rename rankpercA rankpercA_`q'
rename rankpercB rankpercB_`q'
rename rankpercC rankpercC_`q'
rename rankpercD rankpercD_`q'

local drivedim "a b c d"
foreach x of local drivedim {
rename bottom50_`x' bottom50_`q'_`x'
rename top25_`x' top25_`q'_`x'
}

preserve 
keep driver_id rankpercA_`q' rankpercB_`q' rankpercC_`q' rankpercD_`q'
save "$filepath\Data\Using databases\Incidence check\feed_incidenceRANK_`q'", replace
restore

keep driver_id bottom50_`q'_* top25_`q'_*
save "$filepath\Data\Using databases\Incidence check\feed_incidence_`q'", replace
clear
local q=`q'+1
}
set more off

use "$filepath\Data\Using databases\Incidence check\feed_incidence_0.dta"

sort driver_id
foreach x of numlist 1(1)10{
joinby driver_id using "$filepath\Data\Using databases\Incidence check\feed_incidence_`x'.dta", unmatched(master)
drop _merge
}


sort driver_id
foreach x of numlist 0(1)10{
joinby driver_id using "$filepath\Data\Using databases\Incidence check\feed_incidenceRANK_`x'.dta", unmatched(master)
drop _merge
}

rename driver_id chauf_nr_rug 

joinby chauf_nr_rug using "$filepath\Data\Using databases\randomization.dta", unmatched(master)
drop _merge

order chauf_nr_rug treatment

levelsof chauf_nr_rug, local(level_drivers)

local drivedim "a b c"

foreach x of local drivedim {
gen bottom50_`x'=0
gen bottom50_`x'_rounds=0
gen bottom50_`x'_share=0
gen top25_`x'=0
gen top25_`x'_rounds=0
gen top25_`x'_share=0
}

foreach x of local level_drivers {
foreach y of numlist 0(1)10 {
foreach var of local drivedim {
sum bottom50_`y'_`var' if chauf_nr_rug==`x' & bottom50_`y'_`var'!=.
local `y'_`var'_`x'=r(mean)
count if (bottom50_`y'_`var'==1 | bottom50_`y'_`var'==0) & chauf_nr_rug==`x'
local bottomcount_`y'_`var'_`x'=r(N)
sum top25_`y'_`var' if chauf_nr_rug==`x' & top25_`y'_`var'!=.
local top_`y'_`var'_`x'=r(mean)
count if (top25_`y'_`var'==1 | top25_`y'_`var'==0) & chauf_nr_rug==`x'
local topcount_`y'_`var'_`x'=r(N)
replace bottom50_`var'=bottom50_`var'+``y'_`var'_`x'' if chauf_nr_rug==`x' & bottom50_`y'_`var'!=.
replace bottom50_`var'_rounds=bottom50_`var'_rounds+`bottomcount_`y'_`var'_`x'' if chauf_nr_rug==`x'
replace bottom50_`var'_share=100*(bottom50_`var' / bottom50_`var'_rounds) if chauf_nr_rug==`x'
replace top25_`var'=top25_`var'+`top_`y'_`var'_`x'' if chauf_nr_rug==`x' & top25_`y'_`var'!=.
replace top25_`var'_rounds=top25_`var'_rounds+`topcount_`y'_`var'_`x'' if chauf_nr_rug==`x'
replace top25_`var'_share=100*(top25_`var' / top25_`var'_rounds) if chauf_nr_rug==`x'
}
}
}


save "$filepath\Data\Using databases\Incidence check\FullDataRelPerformanceAndFeedback", replace
